The objective of this notebook is to refine the clustering annotation done at level 4. This refinement is the result of a manual curation carried out by specialists to remove poor quality cells, misclassified cells or clusters with very few cells.
library(Seurat)
library(Signac)
library(tidyverse)
library(reshape2)
library(ggpubr)
library(harmony)
library(unikn)
library(plyr)
set.seed(333)
cell_type = "PC"
path_to_obj <- str_c(
here::here("scATAC-seq/results/R_objects/level_4/"),
cell_type,
"/",
cell_type,
"_integration_peak_calling_level_4.rds",
sep = ""
)
path_to_save <- str_c(
here::here("scATAC-seq/results/R_objects/level_5/"),
cell_type,
"/01.",
cell_type,
"_integrated_MBC_GCBC_level_5.rds",
sep = ""
)
path_to_obj_RNA <- str_c(
here::here("scRNA-seq/3-clustering/5-level_5/"),
cell_type,
"/",
cell_type,
"_seu_obj_level_5_eta.rds",
sep = ""
)
path_to_obj_B_cell_lineage <- here::here("scATAC-seq/results/R_objects/B_cells_integrated.rds")
path_to_gcbc <- here::here("scATAC-seq/results/R_objects/level_4/GCBC/GCBC_integration_peak_calling_level_4.rds")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 81160 features across 1932 samples within 1 assay
## Active assay: peaks_redefined (81160 features, 76624 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(seurat,
group.by = "annotation_level_5",
pt.size = 0.1)
seurat_GCBC <- readRDS(path_to_gcbc)
seurat_GCBC
## An object of class Seurat
## 142257 features across 21447 samples within 1 assay
## Active assay: peaks_redefined (142257 features, 142254 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(seurat_GCBC,
pt.size = 0.1, label = T)
seurat_B_cell_lineage <- readRDS(path_to_obj_B_cell_lineage)
seurat_B_cell_lineage
## An object of class Seurat
## 270784 features across 64847 samples within 1 assay
## Active assay: peaks_macs (270784 features, 269800 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(seurat_B_cell_lineage,
pt.size = 0.1, label = T)
seurat_RNA <- readRDS(path_to_obj_RNA)
DimPlot(seurat_RNA,
group.by = "names_level_5_clusters_eta",
label = T,
pt.size = 0.1) + NoLegend()
Due to the limited number of cells that we have in our scATAC dataset, we are going to merge related scRNAseq cells into larger groups to facilite cell annotation and interpretation.
seurat_RNA$annotation_level_5 <- revalue(seurat_RNA$names_level_5_clusters_eta,
c("Dark Zone GCBC"="Dark Zone GCBC",
"DZ migratory PC precursor"=NA,
"Light Zone GCBC"="Light Zone GCBC",
"PC committed Light Zone GCBC"="PC committed Light Zone GCBC",
"Early PC precursor"="Early PC precursor",
"PB committed early PC precursor"="PB",
"Transitional PB"="PB",
"PB"="PB",
"IgG+ PC precursor"="IgG+ PC precursor",
"preMature IgG+ PC"="preMature IgG+ PC",
"Mature IgG+ PC"="Mature PC",
"MBC derived IgG+ PC"="Mature PC",
"Mature IgA+ PC"="Mature PC",
"MBC derived IgA+ PC"="Mature PC",
"class switch MBC"="class switch MBC",
"MBC derived early PC precursor"="MBC derived PC precursor",
"MBC derived PC precursor"="MBC derived PC precursor",
"IgM+ early PC precursor"="Early PC precursor",
"IgM+ PC precursor"="IgM+ PC precursor",
"preMature IgM+ PC"="preMature IgM+ PC",
"Mature IgM+ PC"="Mature PC",
"Short lived IgM+ PC"=NA,
"IgD PC precursor"="IgD PC precursor"))
DimPlot(seurat_RNA,
group.by = "annotation_level_5",
label = T,
pt.size = 0.1)
general_counts <- table(seurat_RNA$annotation_level_5,
seurat_RNA$assay)
general_counts_melt <- melt(general_counts)
ggbarplot(data = general_counts_melt,
x = "Var1",
y = "value",
fill = "Var2",
color = "Var2",
palette = "Paired",
label = TRUE,
orientation = "horiz",
position = position_dodge(0.9))
seurat_only_multiome <- subset(seurat_RNA, assay == "multiome")
seurat_only_multiomes_melt <- melt(table(seurat_only_multiome$annotation_level_5,
seurat_only_multiome$age_group))
ggbarplot(data = seurat_only_multiomes_melt,
x = "Var1",
y = "value",
fill = "Var2",
color = "Var2",
palette = "Paired",
label = TRUE,
orientation = "horiz",
position = position_dodge(0.9))
DimPlot(seurat_B_cell_lineage,
cols.highlight = "darkred", cols= "grey",
cells.highlight= colnames(seurat_only_multiome),
pt.size = 0.1)
# seurat_B_cell_lineage@graphs$graphname
seurat_B_cell_lineage <- FindSubCluster(
seurat_B_cell_lineage,
cluster = 3,
graph.name = "peaks_macs_snn",
subcluster.name = "LZ",
resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8315
## Number of edges: 276595
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9104
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(seurat_B_cell_lineage,
group.by = "LZ",
label = T)
LZ_1 <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$LZ == "3_2" & seurat_B_cell_lineage$assay == "scATAC"]
LZ_1_m <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$LZ == "3_2" & seurat_B_cell_lineage$assay == "multiome"]
LZ_1_m_final <- LZ_1_m[which(LZ_1_m %in% colnames(seurat_only_multiome))]
LZ_2 <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$LZ == "3_3" & seurat_B_cell_lineage$assay == "scATAC"]
LZ_2_m <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$LZ == "3_3" & seurat_B_cell_lineage$assay == "multiome"]
LZ_2_m_final <- LZ_2_m[which(LZ_2_m %in% colnames(seurat_only_multiome))]
LZ_1.cells.use <- sample(x = c(LZ_1,LZ_1_m_final), size = 600)
LZ_2.cells.use <- sample(x = c(LZ_2,LZ_2_m_final), size = 600)
seurat_B_cell_lineage <- FindSubCluster(
seurat_B_cell_lineage,
cluster = 1,
graph.name = "peaks_macs_snn",
subcluster.name = "DZ",
resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11405
## Number of edges: 360759
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9009
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(seurat_B_cell_lineage,
group.by = "DZ",
label = T)
DZ_1 <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$DZ == "1_0" & seurat_B_cell_lineage$assay == "scATAC"]
DZ_1_m <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$DZ == "1_0" & seurat_B_cell_lineage$assay == "multiome"]
DZ_1_m_final <- DZ_1_m[which(DZ_1_m %in% colnames(seurat_only_multiome))]
DZ_1.cells.use <- sample(x = c(DZ_1,DZ_1_m_final), size = 600)
LZ_1.cells.use = subset(LZ_1.cells.use, !(LZ_1.cells.use %in% setdiff(LZ_1.cells.use,colnames(seurat_GCBC))))
LZ_2.cells.use = subset(LZ_2.cells.use, !(LZ_2.cells.use %in% setdiff(LZ_2.cells.use,colnames(seurat_GCBC))))
DZ_1.cells.use = subset(DZ_1.cells.use, !(DZ_1.cells.use %in% setdiff(DZ_1.cells.use,colnames(seurat_GCBC))))
seurat_B_cell_lineage <- FindSubCluster(
seurat_B_cell_lineage,
cluster = 2,
graph.name = "peaks_macs_snn",
subcluster.name = "csMBC",
resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10806
## Number of edges: 312801
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8260
## Number of communities: 21
## Elapsed time: 0 seconds
DimPlot(seurat_B_cell_lineage,
group.by = "csMBC",
label = T)
csMBC <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$csMBC == "2_0" & seurat_B_cell_lineage$assay == "scATAC"]
csMBC_m <- colnames(seurat_B_cell_lineage)[seurat_B_cell_lineage$csMBC == "2_0" & seurat_B_cell_lineage$assay == "multiome"]
csMBC_m_final <- csMBC_m[which(csMBC_m %in% colnames(seurat_only_multiome))]
csMBC.cells.use <- sample(x = c(csMBC,csMBC_m_final), size = 600)
selecting_cells <- unique(c(colnames(seurat),
LZ_1.cells.use,
LZ_2.cells.use,
DZ_1.cells.use,
csMBC.cells.use))
DimPlot(seurat_B_cell_lineage,
cols.highlight = "brown1",
cols= "grey",
cells.highlight= selecting_cells,
pt.size = 0.1)
tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("barcode", "annotation_level_5","donor_id")
table(tonsil_RNA_annotation$donor_id)
##
## BCLL-14-T BCLL-15-T BCLL-2-T BCLL-8-T BCLL-9-T
## 193 212 1065 456 652
print(paste("Number of PC from scRNA-seq multiome:", sum(table(tonsil_RNA_annotation$annotation_level_5))))
## [1] "Number of PC from scRNA-seq multiome: 2521"
tonsil_ATAC_cell_barcode <- seurat@meta.data %>%
rownames_to_column(var = "cell_barcode") %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("cell_barcode","donor_id")
table(tonsil_ATAC_cell_barcode$donor_id)
##
## BCLL-14-T BCLL-15-T BCLL-8-T BCLL-9-T
## 68 87 237 261
print(paste("Number of PC from scATAC-seq multiome:", sum(table(tonsil_ATAC_cell_barcode$donor_id))))
## [1] "Number of PC from scATAC-seq multiome: 653"
length(intersect(tonsil_ATAC_cell_barcode$cell_barcode,tonsil_RNA_annotation$barcode))
## [1] 647
possible_doublets_ATAC <- setdiff(tonsil_ATAC_cell_barcode$cell_barcode,tonsil_RNA_annotation$barcode)
length(possible_doublets_ATAC)
## [1] 6
barcodes_filter_groups <- tonsil_RNA_annotation[which(is.na(tonsil_RNA_annotation$annotation_level_5)),]$barcode
cells <- selecting_cells[!(selecting_cells %in% c(possible_doublets_ATAC,barcodes_filter_groups))]
PC_level5 <- subset(seurat_B_cell_lineage,
cells = cells)
PC_level5$annotation_level_5 <- "unannotated"
# Renaming cells
PC_level5@meta.data[DZ_1.cells.use,]$annotation_level_5 <- "DZ_1"
PC_level5@meta.data[LZ_1.cells.use,]$annotation_level_5 <- "LZ_1"
PC_level5@meta.data[LZ_2.cells.use,]$annotation_level_5 <- "LZ_2"
PC_level5@meta.data[csMBC.cells.use,]$annotation_level_5 <- "csMBC"
PC_level5@meta.data[colnames(PC_level5),]$annotation_level_5 <- as.character(PC_level5@meta.data[colnames(PC_level5),]$annotation_level_5)
PC_level5 <- PC_level5 %>%
RunTFIDF() %>%
FindTopFeatures(min.cutoff = 10) %>%
RunSVD()
DepthCor(PC_level5)
PC_level5 <- RunUMAP(object = PC_level5,
reduction = 'lsi',
dims = 2:40)
DimPlot(PC_level5,
group.by = "annotation_level_5",
pt.size = 0.2)
DimPlot(PC_level5,
group.by = "assay",
pt.size = 0.2)
PC_level5 <- RunHarmony(
object = PC_level5,
dims = 2:40,
group.by.vars = 'gem_id',
reduction = 'lsi',
assay.use = 'peaks_macs',
project.dim = FALSE,
max.iter.harmony = 20
)
PC_level5 <- RunUMAP(PC_level5,
reduction = "harmony",
dims = 2:16)
DimPlot(PC_level5,
group.by = "annotation_level_5",
pt.size = 1)
DimPlot(PC_level5,
group.by = "annotation_level_5",
split.by = "assay",
pt.size = 0.2)
DimPlot(PC_level5,
split.by = "age_group",
pt.size = 0.2)
saveRDS(PC_level5, path_to_save)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.6 unikn_0.4.0 harmony_1.0 Rcpp_1.0.6 ggpubr_0.4.0 reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 ggplot2_3.3.5 tidyverse_1.3.0 Signac_1.2.1 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20 tidyselect_1.1.1 htmlwidgets_1.5.3 grid_4.0.3 docopt_0.7.1 BiocParallel_1.22.0 Rtsne_0.15 munsell_0.5.0 codetools_0.2-17 ica_1.0-2 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-2 knitr_1.30 rstudioapi_0.11 stats4_4.0.3 ROCR_1.0-11 ggsignif_0.6.0 tensor_1.5 listenv_0.8.0 labeling_0.4.2 slam_0.1-47 GenomeInfoDbData_1.2.3 polyclip_1.10-0 farver_2.1.0 rprojroot_2.0.2 parallelly_1.26.1 vctrs_0.3.8 generics_0.1.0 xfun_0.18 lsa_0.73.2 ggseqlogo_0.1 R6_2.5.0 GenomeInfoDb_1.24.2 bitops_1.0-7 spatstat.utils_2.2-0 assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1 gtable_0.3.0 globals_0.14.0 goftest_1.2-2 rlang_0.4.11 RcppRoll_0.3.0 splines_4.0.3 rstatix_0.6.0 lazyeval_0.2.2 spatstat.geom_2.2-0 broom_0.7.2
## [52] BiocManager_1.30.10 yaml_2.2.1 abind_1.4-5 modelr_0.1.8 backports_1.1.10 httpuv_1.6.1 tools_4.0.3 bookdown_0.21 ellipsis_0.3.2 spatstat.core_2.2-0 RColorBrewer_1.1-2 BiocGenerics_0.34.0 ggridges_0.5.3 zlibbioc_1.34.0 RCurl_1.98-1.2 rpart_4.1-15 deldir_0.2-10 pbapply_1.4-3 cowplot_1.1.1 S4Vectors_0.26.0 zoo_1.8-9 haven_2.3.1 ggrepel_0.9.1 cluster_2.1.0 here_1.0.1 fs_1.5.0 magrittr_2.0.1 RSpectra_0.16-0 data.table_1.14.0 scattermore_0.7 openxlsx_4.2.4 lmtest_0.9-38 reprex_0.3.0 RANN_2.6.1 SnowballC_0.7.0 fitdistrplus_1.1-5 matrixStats_0.59.0 hms_0.5.3 patchwork_1.1.1 mime_0.11 evaluate_0.14 xtable_1.8-4 rio_0.5.16 sparsesvd_0.2 readxl_1.3.1 IRanges_2.22.1 gridExtra_2.3 compiler_4.0.3 KernSmooth_2.23-17 crayon_1.4.1 htmltools_0.5.1.1
## [103] mgcv_1.8-33 later_1.2.0 lubridate_1.7.9 DBI_1.1.0 tweenr_1.0.1 dbplyr_1.4.4 MASS_7.3-53 Matrix_1.3-4 car_3.0-10 cli_3.0.0 parallel_4.0.3 igraph_1.2.6 GenomicRanges_1.40.0 pkgconfig_2.0.3 foreign_0.8-80 plotly_4.9.4.1 spatstat.sparse_2.0-0 xml2_1.3.2 XVector_0.28.0 rvest_0.3.6 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.18 spatstat.data_2.1-0 Biostrings_2.56.0 rmarkdown_2.5 cellranger_1.1.0 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 curl_4.3.2 shiny_1.6.0 Rsamtools_2.4.0 lifecycle_1.0.0 nlme_3.1-150 jsonlite_1.7.2 carData_3.0-4 viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.1 lattice_0.20-41 fastmap_1.1.0 httr_1.4.2 survival_3.2-7 glue_1.4.2 qlcMatrix_0.9.7 zip_2.1.1 png_0.1-7 ggforce_0.3.2 stringi_1.6.2 blob_1.2.1
## [154] irlba_2.3.3 future.apply_1.7.0